split train into train and validation

data = read.csv("../Data/train.csv")
test = read.csv("../Data/test.csv")
smp_size <- floor(0.8 * nrow(data))
## set the seed to make your partition reproductible
set.seed(123)
train_ind <- sample(seq_len(nrow(data)), size = smp_size)
train <- data[train_ind, ]
validation <- data[-train_ind, ]

analysis

lattitude , longitude plot

library(ggplot2)
ggplot(data = data) +
  geom_point(aes(x = latitude, y = longitude, color =(interest_level)), alpha = 1)+
    xlim(40.5, 41.0) +
    ylim(-74.05, -73.75)

ggplot(data = data[data["interest_level"] == "low",]) +
  geom_point(aes(x = latitude, y = longitude), alpha = 1)+
    xlim(40.5, 41.0) +
    ylim(-74.05, -73.75)

ggplot(data = data[data["interest_level"] == "medium",]) +
  geom_point(aes(x = latitude, y = longitude), alpha = 1)+
    xlim(40.5, 41.0) +
    ylim(-74.05, -73.75)

ggplot(data = data[data["interest_level"] == "high",]) +
  geom_point(aes(x = latitude, y = longitude), alpha = 1)+
    xlim(40.5, 41.0) +
    ylim(-74.05, -73.75)

lattitude, longitude and

library(plotly)
p <- plot_ly(data[data$longitude > -74 & data$longitude < -73.9 & data$price < 20000,], x = ~latitude, y = ~longitude, z = ~price, color = ~interest_level, colors = c('#BF382A', '#0C4B8E', "#F0E442")) %>%
  add_markers() %>%
  layout(list(   
         xaxis = list( range =c(40, 41), autorange = F),
         yaxis = list( range = c(-74, -73), autorange = F)
         
         
           
        ))
p
LS0tCnRpdGxlOiAiYW5hbHlzaXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgoKCiMgc3BsaXQgdHJhaW4gaW50byB0cmFpbiBhbmQgdmFsaWRhdGlvbgoKYGBge3J9CgpkYXRhID0gcmVhZC5jc3YoIi4uL0RhdGEvdHJhaW4uY3N2IikKdGVzdCA9IHJlYWQuY3N2KCIuLi9EYXRhL3Rlc3QuY3N2IikKCnNtcF9zaXplIDwtIGZsb29yKDAuOCAqIG5yb3coZGF0YSkpCiMjIHNldCB0aGUgc2VlZCB0byBtYWtlIHlvdXIgcGFydGl0aW9uIHJlcHJvZHVjdGlibGUKc2V0LnNlZWQoMTIzKQp0cmFpbl9pbmQgPC0gc2FtcGxlKHNlcV9sZW4obnJvdyhkYXRhKSksIHNpemUgPSBzbXBfc2l6ZSkKdHJhaW4gPC0gZGF0YVt0cmFpbl9pbmQsIF0KdmFsaWRhdGlvbiA8LSBkYXRhWy10cmFpbl9pbmQsIF0KYGBgCgoKCiMgYW5hbHlzaXMKCiMjIGxhdHRpdHVkZSAsIGxvbmdpdHVkZSBwbG90CgoKYGBge3J9CmxpYnJhcnkoZ2dwbG90MikKCgpnZ3Bsb3QoZGF0YSA9IGRhdGEpICsKICBnZW9tX3BvaW50KGFlcyh4ID0gbGF0aXR1ZGUsIHkgPSBsb25naXR1ZGUsIGNvbG9yID0oaW50ZXJlc3RfbGV2ZWwpKSwgYWxwaGEgPSAxKSsKICAgIHhsaW0oNDAuNSwgNDEuMCkgKwogICAgeWxpbSgtNzQuMDUsIC03My43NSkKCgpnZ3Bsb3QoZGF0YSA9IGRhdGFbZGF0YVsiaW50ZXJlc3RfbGV2ZWwiXSA9PSAibG93IixdKSArCiAgZ2VvbV9wb2ludChhZXMoeCA9IGxhdGl0dWRlLCB5ID0gbG9uZ2l0dWRlKSwgYWxwaGEgPSAxKSsKICAgIHhsaW0oNDAuNSwgNDEuMCkgKwogICAgeWxpbSgtNzQuMDUsIC03My43NSkKCmdncGxvdChkYXRhID0gZGF0YVtkYXRhWyJpbnRlcmVzdF9sZXZlbCJdID09ICJtZWRpdW0iLF0pICsKICBnZW9tX3BvaW50KGFlcyh4ID0gbGF0aXR1ZGUsIHkgPSBsb25naXR1ZGUpLCBhbHBoYSA9IDEpKwogICAgeGxpbSg0MC41LCA0MS4wKSArCiAgICB5bGltKC03NC4wNSwgLTczLjc1KQoKZ2dwbG90KGRhdGEgPSBkYXRhW2RhdGFbImludGVyZXN0X2xldmVsIl0gPT0gImhpZ2giLF0pICsKICBnZW9tX3BvaW50KGFlcyh4ID0gbGF0aXR1ZGUsIHkgPSBsb25naXR1ZGUpLCBhbHBoYSA9IDEpKwogICAgeGxpbSg0MC41LCA0MS4wKSArCiAgICB5bGltKC03NC4wNSwgLTczLjc1KQoKCmBgYAoKCgoKIyMgbGF0dGl0dWRlLCBsb25naXR1ZGUgYW5kIAoKCmBgYHtyfQpsaWJyYXJ5KHBsb3RseSkKCnAgPC0gcGxvdF9seShkYXRhW2RhdGEkbG9uZ2l0dWRlID4gLTc0ICYgZGF0YSRsb25naXR1ZGUgPCAtNzMuOSAmIGRhdGEkcHJpY2UgPCAyMDAwMCxdLCB4ID0gfmxhdGl0dWRlLCB5ID0gfmxvbmdpdHVkZSwgeiA9IH5wcmljZSwgY29sb3IgPSB+aW50ZXJlc3RfbGV2ZWwsIGNvbG9ycyA9IGMoJyNCRjM4MkEnLCAnIzBDNEI4RScsICIjRjBFNDQyIikpICU+JQogIGFkZF9tYXJrZXJzKCkgJT4lCiAgbGF5b3V0KGxpc3QoICAgCiAgICAgICAgIHhheGlzID0gbGlzdCggcmFuZ2UgPWMoNDAsIDQxKSwgYXV0b3JhbmdlID0gRiksCiAgICAgICAgIHlheGlzID0gbGlzdCggcmFuZ2UgPSBjKC03NCwgLTczKSwgYXV0b3JhbmdlID0gRikKICAgICAgICAgCiAgICAgICAgIAogICAgICAgICAgIAogICAgICAgICkpCnAKYGBgCgo=